Libraries

We need a bunch of these…

JSON processing

First jsonlite for processing the colours data pulled from the website

library(jsonlite)

Data wrangling

Next, a bunch of data munging packages from the ‘tidyverse’

library(plyr) # rbind.fill is useful for ragged data with missing entries
library(dplyr)  
library(magrittr)
library(tibble)
library(stringr) 

Spatial

And finally the R (vector) spatial packages

library(sf)
library(tmap)
library(tmaptools) # for geocode_OSM
tmap_mode("view") # for web maps
## tmap mode set to interactive viewing

Getting the colours

See the Dulux website for what this is all about

First I had a poke around on the website to figure out where the colour details were to be found

colour_groups <- c("blues", "browns", "greens", "greys", "oranges",
                  "purples", "reds", "whites-neutrals", "yellows")
base_url <- "https://www.dulux.co.nz/content/duluxnz/home/colour/all-colours.categorycolour.json/all-colours/"

Retrieve and save the colours

The loop on the next slide

  • steps through the group names
  • retrieves the relevant JSON file
  • writes it out locally
  • adds the colours information to a list
  • then we make the list into a single table using bind_rows

colours <- list()
for (i in 1:length(colour_groups)) {
  colour_group <- colour_groups[i]
  json_url <- str_c(base_url, colour_group)
  json_file_name <- str_c(colour_group, ".json")
  json <- fromJSON(json_url, flatten = TRUE)
  # make a local copy (just for convenience)
  write_json(json, json_file_name)
  # get the colours information and add to list
  the_colours <- rbind.fill(json$categoryColours$masterColour.colours)
  colours[[i]] <- the_colours
  Sys.sleep(0.5) # pause to not annoy the the server  
}
df_colours <- bind_rows(colours)
write.csv(df_colours, "dulux-colours-raw.csv", row.names = FALSE)

Check we’re all good

df_colours <- read.csv("dulux-colours-raw.csv")
head(df_colours)
##       id red green blue lrv      baseId           name woodType coats
## 1 149253 205   210  206  67 vivid_white Pukaki Quarter     None    NA
## 2 149254 220   240  242  86 vivid_white      Canoe Bay     None    NA
## 3 149255 226   240  245  87 vivid_white      Mt Dobson     None    NA
## 4 149256 220   230  235  80 vivid_white        Raetihi     None    NA
## 5 149257 217   219  223  74 vivid_white   Taiaroa Head     None    NA
## 6 149258 180   200  219  60 vivid_white   Gulf Harbour     None    NA

Tidying up the names

There are paint names with modifiers as suffixes for different shades of particular colours, and we need to handle this

The modifiers are

paint_modifiers <- c("Half", "Quarter", "Double")

A tidyverse pipeline

Here’s one way to clean this up (there are others…)

df_colours_tidied <- df_colours %>%
  ## remove some columns we won't be needing
  select(-id, -baseId, -woodType, -coats) %>%
  ## separate the name components, filling from the left with NAs if <5
  separate(name, into = c("p1", "p2", "p3", "p4", "p5"), sep = " ", 
           remove = FALSE, fill = "left") %>%
  ## replace any NAs with an empty string
  mutate(p1 = str_replace_na(p1, ""),
         p2 = str_replace_na(p2, ""),
         p3 = str_replace_na(p3, ""),
         p4 = str_replace_na(p4, "")) %>%
  ## if p5 is a paint modifiers, then recompose name 
  ## from p1:p4 else from p1:p5
  ## similarly keep modifier where it exists
  mutate(placename = if_else(p5 %in% paint_modifiers, 
                       str_trim(str_c(p1, p2, p3, p4, sep = " ")), 
                       str_trim(str_c(p1, p2, p3, p4, p5, sep = " "))),
         modifier = if_else(p5 %in% paint_modifiers, 
                       p5, "")) %>%
  ## remove some places that are tricky to deal with later
  filter(!placename %in% c("Chatham Islands", 
                           "Passage Rock", 
                           "Auckland Islands", 
                           "Cossack Rock")) %>%
  ## throw away variables we no longer and reorder
  select(name, placename, modifier, red, green, blue)

# save it so we have it for later
write.csv(df_colours_tidied, "dulux-colours.csv", row.names = FALSE)

Build the spatial dataset

Add x and y columns to our data for the coordinates—note that we reload from the saved file so as not to keep hitting the Dulux website

df_colours_tidied <- read.csv("dulux-colours.csv")
df_colours_tidied_xy <- df_colours_tidied %>%
  mutate(x = 0, y = 0)

Geocode with tmaptools::geocode_OSM

Code on the next slide:

  • goes through all the unique placenames
  • appends as many x y coordinates as we have space for (due to the modifiers) from the geocoding results

Best not to re-run this (it takes a good 10 minutes and it’s not good to repeatedly geocode and hit the OSM server)

for (placename in unique(df_colours_tidied_xy$placename)) {
  address <- str_c(placename, "New Zealand", sep = ", ")
  geocode <- geocode_OSM(address, as.data.frame = TRUE, return.first.only = FALSE)
  num_geocodes <- nrow(geocode)
  matching_rows <- which(df_colours_tidied_xy$placename == placename)
  for (i in 1:length(matching_rows)) {
    if (!is.null(geocode)) {
      if (num_geocodes >= i) {
        df_colours_tidied_xy[matching_rows[i], ]$x <- geocode$lon[i] 
        df_colours_tidied_xy[matching_rows[i], ]$y <- geocode$lat[i] 
      }  
    }
  }
  Sys.sleep(0.5) # so as not to over-tax the geocoder
}

Remove anything we missed

Another tidy up removing anything that didn’t get geocoded

df_colours_tidied_xy <- df_colours_tidied_xy %>%
  filter(x != 0 & y != 0)

write.csv(df_colours_tidied_xy, "dulux-colours-xy.csv", row.names = FALSE)

Making a map

Get the geocoded dataset we made earlier

dulux_colours <- read.csv("dulux-colours-xy.csv")

Make the dataframe into a sf point dataset

dulux_colours_sf <- st_as_sf(dulux_colours, 
                     coords = c("x", "y"), # the columns with the coordinates
                     crs = 4326) %>%       # the project EPSG:4326 for lng-lat
  st_transform(2193) %>% # convert to NZTM
  ## and make an RGB column
  mutate(rgb = rgb(red / 255, 
                   green / 255, 
                   blue/ 255))

st_write(dulux_colours_sf, "dulux-colours-pts.gpkg", delete_dsn = TRUE)
## Deleting source `dulux-colours-pts.gpkg' using driver `GPKG'
## Writing layer `dulux-colours-pts' to data source `dulux-colours-pts.gpkg' using driver `GPKG'
## Writing 904 features with 7 fields and geometry type Point.

And at last a map!

nz <- st_read("nz.gpkg")
## Reading layer `nz' from data source `/home/osullid3/Documents/code/dulux-colours-map/nz.gpkg' using driver `GPKG'
## Simple feature collection with 1 feature and 0 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1089972 ymin: 4748123 xmax: 2091863 ymax: 6223160
## Projected CRS: NZGD2000 / New Zealand Transverse Mercator 2000
tm_shape(nz) +
  tm_borders() +
  tm_shape(dulux_colours_sf) + 
  tm_dots(col = "rgb")

Better yet, Voronois

Points aren’t really much fun

Instead, make up Voronois and clip to NZ

dulux_colours_vor <- dulux_colours_sf %>%
  st_union() %>%
  st_voronoi() %>%
  st_cast() %>%
  st_as_sf() %>%
  st_join(dulux_colours_sf, left = FALSE) %>%
  st_intersection(st_read("nz.gpkg")) 

st_write(dulux_colours_vor, "dulux-colours-vor.gpkg", delete_dsn = TRUE)

And map it

dulux_colours_vor <- st_read("dulux-colours-vor.gpkg")
## Reading layer `dulux-colours-vor' from data source `/home/osullid3/Documents/code/dulux-colours-map/dulux-colours-vor.gpkg' using driver `GPKG'
## Simple feature collection with 902 features and 7 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 1089972 ymin: 4748123 xmax: 2091863 ymax: 6223160
## Projected CRS: NZGD2000 / New Zealand Transverse Mercator 2000
tm_shape(dulux_colours_vor) + 
  tm_polygons(col = "rgb", id = "placename", alpha = 0.75, border.col = "grey", lwd = 0.2) +
  tm_basemap("Esri.WorldTopoMap")

Credits and more